# Script to run moving window regressions and produce figure
library(dplyr)
library(magrittr)
library(tidyr)
library(ggplot2)

dat <- rio::import("data/Demo_book_country_3.dta")

spat <- dat %>%
  select(country_id, year, v2x_polyarchy_imp_100, portdist_natural_km)

# don't have the dates quite right here...
regs_df <- data.frame(start_year = 1800:1990,
                      end_year = 1829:2019,
                      coef = NA,
                      se = NA,
                      t_value = NA,
                      N = NA,
                      r2 = NA, 
                      series = "all")

c_regs_df <- data.frame(start_year = 1800:1990,
                        end_year = 1829:2019,
                        coef = NA,
                        se = NA,
                        t_value = NA,
                        N = NA,
                        r2 = NA, 
                        series = "continuous")

keep_group <- spat %>%
  filter(year == 1900 & !is.na(v2x_polyarchy_imp_100) & !is.na(portdist_natural_km))
keep_group <- levels(as.factor(keep_group$country_id))

continuous_df <- spat %>%
  filter(country_id %in% keep_group) 

for(i in 1:nrow(regs_df)){
  subset_data <- spat[which(spat$year %in% c(regs_df$start_year[i]:regs_df$end_year[i])), ]
  reg <- miceadds::lm.cluster(v2x_polyarchy_imp_100 ~ portdist_natural_km + as.factor(year), data = subset_data, cluster = "country_id")
  reg_sum <- summary(reg)
  regs_df$coef[i] <- reg_sum[2, 1]
  regs_df$se[i] <- reg_sum[2, 2]
  regs_df$t_value[i] <- reg_sum[2, 3]
  regs_df$N[i] <- nrow(reg$lm_res$model)
  regs_df$r2[i] <- summary(reg$lm_res)$r.squared
}
for(i in 1:nrow(c_regs_df)){
  s2 <- continuous_df[which(continuous_df$year %in% c(c_regs_df$start_year[i]:c_regs_df$end_year[i])), ]
  r2 <- miceadds::lm.cluster(v2x_polyarchy_imp_100 ~ portdist_natural_km + as.factor(year), data = s2, cluster = "country_id")
  reg_sum <- summary(r2)
  c_regs_df$coef[i] <- reg_sum[2, 1]
  c_regs_df$se[i] <- reg_sum[2, 2]
  c_regs_df$t_value[i] <- reg_sum[2, 3]
  c_regs_df$N[i] <- nrow(r2$lm_res$model)
  c_regs_df$r2[i] <- summary(r2$lm_res)$r.squared
}

regs_df$year <- regs_df$end_year - 15

g <- ggplot(regs_df, aes(x = year)) +
  geom_ribbon(aes(ymin = coef - se, ymax = coef + se), color = 'lightgray', alpha = 0.25) +
  geom_point(aes(y = coef)) +
  geom_hline(yintercept = 0) +
  #scale_y_continuous(limits = c(-.0047, 0)) +
  scale_x_continuous(breaks = seq(1800, 2025, by = 25)) +
  labs(x = "Year", y = "Coefficient estimates for Natural Harbor Distance") +
  scale_color_manual(name = "Sample",
                     labels = c("All\nobservations\n"),
                     values = c("#E69F00")) +
  theme(axis.text.x = element_text(face = ifelse(regs_df$coef == 0,
                                                 "bold",
                                                 "plain")),
        panel.grid.minor = element_blank(), 
        panel.grid.major = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank(),
        axis.line = element_line(color = "black"))

## ggsave(plot = g, filename = "figure_6_3.png",
##        width = 11, height = 8.5)

ggsave(plot = g, filename = "output/figure_6_3.tiff",
       width = 11, height = 8.5, dpi = 300)
